package com.agan.aganjavaspyder.crawler.ct_demo;

import com.geccocrawler.gecco.GeccoEngine;
import com.geccocrawler.gecco.annotation.*;
import com.geccocrawler.gecco.request.HttpRequest;
import com.geccocrawler.gecco.spider.HrefBean;
import com.geccocrawler.gecco.spider.HtmlBean;
import lombok.Getter;
import lombok.Setter;

import java.util.List;

/**
 * @author agan
 * @ClassName StartCtPhone
 * @Description
 * @Date 2020/10/15 9:11 下午
 **/
@Setter
@Getter
@Gecco(matchUrl="http://www.189.cn/dqmh/tianyiMall/searchMallAction.do?method=goToSearch&searchtext={searchtext}&pageSize={pageSize}&internal_search=1&shopId=10001&currentPage={pageNum}&listType=0",pipelines = {
        "consolePipeline", "CtPhonePipeline" })
public class StartCtPhone implements HtmlBean {

    @Request
    private HttpRequest request;

    @RequestParameter("searchtext")
    private String searchtext;

    @RequestParameter("pageSize")
    private int pageSize;

    @RequestParameter("pageNum")
    private int pageNum;
    /**
     * js 脚本获取
     */
    @Html
    @HtmlField(cssPath="script")
    private String resultText;

    public static void main(String[] args) {
        GeccoEngine.create()
                //Gecco搜索的包路径
                .classpath("com.agan.aganjavaspyder.crawler")
                //开始抓取的页面地址
                .seed("http://www.189.cn/dqmh/tianyiMall/searchMallAction.do?method=goToSearch&searchtext=手机&pageSize=16&internal_search=1&shopId=10001&currentPage=1&listType=0")
                //开启几个爬虫线程
                .thread(1)
                .debug(true)
                //单个爬虫每次抓取完一个请求后的间隔时间
                .interval(2000)
                .start();
    }




}
